The goal of this script is to generate a Seurat object for sample GSM3717037.
LogNormalize, then doublets
detection using scran hybrid and scDblFinder
method (but not filtering)LogNormalize, for only the remaining
cellsPCAtSNE and UMAPlibrary(dplyr)
library(patchwork)
library(ggplot2)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
out_dir = "."
We load the parameters :
sample_name = params$sample_name # "GSM3717034"
# sample_name = "GSM3717034"
Input count matrix is there :
count_matrix_dir = paste0(out_dir, "/input/", sample_name, "/")
count_matrix_file = list.files(count_matrix_dir, full.names = TRUE)
count_matrix_file
## [1] "./input/GSM3717037//GSM3717037_10X1data.HFU13.tsv.gz"
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_cell_markers.rds"))
cell_markers = lapply(cell_markers, FUN = toupper)
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 13 13 9 10
## B cells cuticle cortex medulla
## 16 15 16 10
## IRS proliferative IBL ORS
## 16 20 15 16
## IFE HFSC melanocytes sebocytes
## 17 17 10 8
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = lapply(dotplot_markers, FUN = toupper)
dotplot_markers
## $`CD4 T cells`
## [1] "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5"
##
## $IBL
## [1] "KRT16" "KRT6C"
##
## $ORS
## [1] "KRT15" "GPX2"
##
## $IFE
## [1] "SPINK5" "LY6D"
##
## $HFSC
## [1] "DIO2" "TCEAL2"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
We load metadata for this sample :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/takahashi_sample_info.rds"))
sample_info %>%
dplyr::filter(project_name == sample_name)
## project_name sample_type sample_identifier color
## 1 GSM3717037 Takahashi_HD Takahashi_HD_4 darkolivegreen
These is a parameter for different functions :
cl = aquarius::create_parallel_instance(nthreads = 3L)
cut_log_nCount_RNA = 0.5 # almost no filter
cut_nFeature_RNA = 20 # almost no filter
cut_percent.mt = 20
cut_percent.rb = 50
In this section, we load the count matrix.
mat = read.table(count_matrix_file,
header = TRUE, row.names = 1)
# For the two 10X data, this is required
rownames(mat) = stringr::str_remove(rownames(mat),
pattern = "hg19_")
# Seurat object
sobj = Seurat::CreateSeuratObject(counts = mat,
project = sample_name,
assay = "RNA")
rm(mat)
sobj
## An object of class Seurat
## 32738 features across 6000 samples within 1 assay
## Active assay: RNA (32738 features, 0 variable features)
(Time to run : 64.1 s)
We add the same columns as in metadata :
row_oi = (sample_info$project_name == sample_name)
sobj$project_name = sample_name
sobj$sample_identifier = sample_info[row_oi, "sample_identifier"]
sobj$sample_type = sample_info[row_oi, "sample_type"]
colnames(sobj@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "project_name" "sample_identifier" "sample_type"
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 32738 features across 6000 samples within 1 assay
## Active assay: RNA (32738 features, 3000 variable features)
We generate a tSNE to visualize cells before filtering.
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"),
check_duplicates = FALSE)
sobj
## An object of class Seurat
## 32738 features across 6000 samples within 1 assay
## Active assay: RNA (32738 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We annotate cells for cell type using
Seurat::AddModuleScore function.
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 86 32 69 54
## B cells cuticle cortex medulla
## 65 233 126 102
## IRS proliferative IBL ORS
## 190 240 756 744
## IFE HFSC melanocytes sebocytes
## 1561 1239 375 128
(Time to run : 31.63 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", "MSX2", "KRT16",
unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase using Seurat and
cyclone.
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 4991 478 443
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 3337 278 285
## G2M 761 160 83
## S 893 40 75
(Time to run : 467.45 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
In this section, we look at the number of genes expressed by each cell, the number of UMI, the percentage of mitochondrial genes expressed, and the percentage of ribosomal genes expressed. Then, without taking into account the cells expressing low number of genes or have low number of UMI, we identify doublet cells.
We compute four quality metrics :
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^MT", col.name = "percent.mt")
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^RP[L|S][0-9]*$", col.name = "percent.rb")
sobj$log_nCount_RNA = log(sobj$nCount_RNA)
head(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA project_name
## AAACCTGAGAAACCGC GSM3717037 1680 821 GSM3717037
## AAACCTGAGATATACG GSM3717037 139 112 GSM3717037
## AAACCTGCAAAGTCAA GSM3717037 9496 2666 GSM3717037
## AAACCTGCACGGCCAT GSM3717037 9837 2388 GSM3717037
## AAACCTGCAGCTGCAC GSM3717037 15679 3277 GSM3717037
## AAACCTGTCAACGGCC GSM3717037 154 136 GSM3717037
## sample_identifier sample_type score_CD4_T_cells
## AAACCTGAGAAACCGC Takahashi_HD_4 Takahashi_HD -0.04562790
## AAACCTGAGATATACG Takahashi_HD_4 Takahashi_HD 0.00000000
## AAACCTGCAAAGTCAA Takahashi_HD_4 Takahashi_HD -0.05167898
## AAACCTGCACGGCCAT Takahashi_HD_4 Takahashi_HD -0.05105475
## AAACCTGCAGCTGCAC Takahashi_HD_4 Takahashi_HD 0.03635953
## AAACCTGTCAACGGCC Takahashi_HD_4 Takahashi_HD 0.00000000
## score_CD8_T_cells score_Langerhans_cells score_macrophages
## AAACCTGAGAAACCGC -0.023382583 -0.01671626 -0.021117268
## AAACCTGAGATATACG 0.000000000 0.00000000 0.000000000
## AAACCTGCAAAGTCAA -0.033907911 -0.03393119 -0.022653710
## AAACCTGCACGGCCAT -0.032718647 -0.03883237 -0.024568747
## AAACCTGCAGCTGCAC 0.005699966 0.02285598 -0.015237188
## AAACCTGTCAACGGCC 0.000000000 -0.01142884 -0.009465922
## score_B_cells score_cuticle score_cortex score_medulla
## AAACCTGAGAAACCGC -0.02327732 -0.16888571 -0.10268153 -0.065967585
## AAACCTGAGATATACG 0.00000000 0.26509648 0.59596144 -0.004515443
## AAACCTGCAAAGTCAA -0.02031960 -0.08818440 -0.13095755 -0.088541109
## AAACCTGCACGGCCAT -0.01720601 -0.07301297 -0.12655003 -0.087404566
## AAACCTGCAGCTGCAC -0.01566195 -0.10465005 -0.05183572 -0.084598266
## AAACCTGTCAACGGCC 0.00000000 -0.04221001 -0.03619432 -0.008818253
## score_IRS score_proliferative score_IBL score_ORS
## AAACCTGAGAAACCGC -0.17210597 -0.06791198 0.1189465 0.29244213
## AAACCTGAGATATACG -0.03225920 -0.01198232 0.1811686 0.18997492
## AAACCTGCAAAGTCAA -0.14062855 -0.08836367 -0.2514209 0.69912468
## AAACCTGCACGGCCAT -0.06395347 -0.04368462 0.8356216 -0.40732036
## AAACCTGCAGCTGCAC -0.19713934 0.03724028 -0.1012079 -0.04019209
## AAACCTGTCAACGGCC -0.05455243 -0.02047534 0.2062756 -0.15128391
## score_IFE score_HFSC score_melanocytes score_sebocytes
## AAACCTGAGAAACCGC -0.20489837 0.27047211 -0.23033721 0.10534018
## AAACCTGAGATATACG -0.16856793 0.15495378 -0.06215034 -0.02152909
## AAACCTGCAAAGTCAA -0.14364016 0.33448522 -0.26051961 -0.12005067
## AAACCTGCACGGCCAT -0.34860765 -0.09168565 -0.05075498 -0.15163047
## AAACCTGCAGCTGCAC -0.06312317 -0.22288526 -0.32111717 -0.09937174
## AAACCTGTCAACGGCC -0.18136702 -0.09937170 -0.05416930 -0.01662678
## cell_type Seurat.Phase cyclone.Phase percent.mt percent.rb
## AAACCTGAGAAACCGC ORS G2M G1 7.9166667 6.666667
## AAACCTGAGATATACG cortex G1 G1 0.7194245 35.251799
## AAACCTGCAAAGTCAA ORS G1 S 2.5484414 21.282645
## AAACCTGCACGGCCAT IBL G2M G1 1.3418725 29.714344
## AAACCTGCAGCTGCAC IFE G2M G1 1.7411825 29.351362
## AAACCTGTCAACGGCC IBL G1 G1 5.1948052 12.987013
## log_nCount_RNA
## AAACCTGAGAAACCGC 7.426549
## AAACCTGAGATATACG 4.934474
## AAACCTGCAAAGTCAA 9.158626
## AAACCTGCACGGCCAT 9.193906
## AAACCTGCAGCTGCAC 9.660078
## AAACCTGTCAACGGCC 5.036953
We get the cell barcodes for the failing cells :
fail_percent.mt = sobj@meta.data %>% dplyr::filter(percent.mt > cut_percent.mt) %>% rownames()
fail_percent.rb = sobj@meta.data %>% dplyr::filter(percent.rb > cut_percent.rb) %>% rownames()
fail_log_nCount_RNA = sobj@meta.data %>% dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA) %>% rownames()
fail_nFeature_RNA = sobj@meta.data %>% dplyr::filter(nFeature_RNA < cut_nFeature_RNA) %>% rownames()
Without taking into account the low UMI and low number of features cells, we identify doublets.
fsobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA)))
fsobj
## An object of class Seurat
## 32738 features across 6000 samples within 1 assay
## Active assay: RNA (32738 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
On this filtered dataset, we apply doublet cells detection. Just before, we run the normalization, taking into account only the remaining cells.
fsobj = Seurat::NormalizeData(fsobj,
normalization.method = "LogNormalize",
assay = "RNA")
fsobj = Seurat::FindVariableFeatures(fsobj,
assay = "RNA",
nfeatures = 3000)
fsobj
## An object of class Seurat
## 32738 features across 6000 samples within 1 assay
## Active assay: RNA (32738 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We identify doublet cells :
fsobj = aquarius::find_doublets(sobj = fsobj,
BPPARAM = cl)
## [1] 32738 6000
##
## FALSE TRUE
## 4942 1058
## [12:12:22] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
##
## FALSE TRUE
## 4815 1185
##
## FALSE TRUE
## 4140 1860
fail_doublets_consensus = Seurat::WhichCells(fsobj, expression = doublets_consensus.class)
fail_doublets_scDblFinder = Seurat::WhichCells(fsobj, expression = scDblFinder.class)
fail_doublets_hybrid = Seurat::WhichCells(fsobj, expression = hybrid_score.class)
(Time to run : 124.09 s)
We add the information in the non filtered Seurat object :
sobj$doublets_consensus.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_consensus ~ TRUE,
!(colnames(sobj) %in% fail_doublets_consensus) ~ FALSE)
sobj$scDblFinder.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_scDblFinder ~ TRUE,
!(colnames(sobj) %in% fail_doublets_scDblFinder) ~ FALSE)
sobj$hybrid_score.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_hybrid ~ TRUE,
!(colnames(sobj) %in% fail_doublets_hybrid) ~ FALSE)
We can visualize the 4 cells quality with a Venn diagram :
n_filtered = c(fail_percent.mt, fail_percent.rb, fail_log_nCount_RNA, fail_nFeature_RNA) %>%
unique() %>% length()
percent_filtered = round(100*(n_filtered/ncol(sobj)), 2)
ggvenn::ggvenn(list(percent.mt = fail_percent.mt,
percent.rb = fail_percent.rb,
log_nCount_RNA = fail_log_nCount_RNA,
nFeature_RNA = fail_nFeature_RNA),
fill_color = c("#0073C2FF", "#EFC000FF", "orange", "pink"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Filtered out cells",
subtitle = paste0(n_filtered, " cells (", percent_filtered, " % of all cells)")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of UMI, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "log_nCount_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_log_nCount_RNA)
Seurat::VlnPlot(sobj, features = "log_nCount_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_log_nCount_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_log_nCount_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "log_nCount_RNA",
subtitle = paste0(length(fail_log_nCount_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of features, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "nFeature_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_nFeature_RNA)
Seurat::VlnPlot(sobj, features = "nFeature_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_nFeature_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_nFeature_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "nFeature_RNA",
subtitle = paste0(length(fail_nFeature_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for mitochondrial gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.mt",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.mt)
Seurat::VlnPlot(sobj, features = "percent.mt", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.mt, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.mt,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.mt",
subtitle = paste0(length(fail_percent.mt), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for ribosomal gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.rb",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.rb)
Seurat::VlnPlot(sobj, features = "percent.rb", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.rb, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.rb,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.rb",
subtitle = paste0(length(fail_percent.rb), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
We would like to see if the number of feature expressed by cell, and
the number of UMI is correlated with the cell type, the percentage of
mitochondrial and ribosomal gene expressed, and the doublet status. We
build the log_nCount_RNA by nFeature_RNA
figure, where cells (dots) are colored by these different metrics.
This is the figure, colored by cell type :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "cell_type",
col_colors = unname(color_markers),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of mitochondrial genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.mt",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of ribosomal genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.rb",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(doublets_consensus.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "doublets_consensus.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(scDblFinder.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "scDblFinder.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(hybrid_score.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "hybrid_score.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
Do filtered cells belong to a particular cell type ?
sobj$all_cells = TRUE
plot_list = list()
## All cells
df = sobj@meta.data
if (nrow(df) == 0) {
plot_list[[1]] = ggplot()
} else {
plot_list[[1]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "All cells",
subtitle = paste(nrow(df), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## Doublets consensus
df = sobj@meta.data %>%
dplyr::filter(doublets_consensus.class)
if (nrow(df) == 0) {
plot_list[[2]] = ggplot()
} else {
plot_list[[2]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "doublets_consensus.class",
subtitle = paste(sum(sobj$doublets_consensus.class, na.rm = TRUE), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.mt
df = sobj@meta.data %>%
dplyr::filter(percent.mt > cut_percent.mt)
if (nrow(df) == 0) {
plot_list[[3]] = ggplot()
} else {
plot_list[[3]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.mt >", cut_percent.mt),
subtitle = paste(length(fail_percent.mt), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.rb
df = sobj@meta.data %>%
dplyr::filter(percent.rb > cut_percent.rb)
if (nrow(df) == 0) {
plot_list[[4]] = ggplot()
} else {
plot_list[[4]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.rb >", cut_percent.rb),
subtitle = paste(length(fail_percent.rb), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## log_nCount_RNA
df = sobj@meta.data %>%
dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA)
if (nrow(df) == 0) {
plot_list[[5]] = ggplot()
} else {
plot_list[[5]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("log_nCount_RNA <", round(cut_log_nCount_RNA, 2)),
subtitle = paste(length(fail_log_nCount_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## nFeature_RNA
df = sobj@meta.data %>%
dplyr::filter(nFeature_RNA < cut_nFeature_RNA)
if (nrow(df) == 0) {
plot_list[[6]] = ggplot()
} else {
plot_list[[6]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("nFeature_RNA <", round(cut_nFeature_RNA, 2)),
subtitle = paste(length(fail_nFeature_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We can compare doublet detection methods with a Venn diagram :
ggvenn::ggvenn(list(hybrid = fail_doublets_hybrid,
scDblFinder = fail_doublets_scDblFinder),
fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::ggtitle(label = "Doublet cells") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
We visualize cells annotation for doublets :
plot_list = list()
# scDblFinder.class
sobj$fail = ifelse(sobj$scDblFinder.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "scDblFinder.class",
subtitle = paste0(sum(sobj$scDblFinder.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# hybrid_score.class
sobj$fail = ifelse(sobj$hybrid_score.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "hybrid_score.class",
subtitle = paste0(sum(sobj$hybrid_score.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
sobj$fail = NULL
# Plot
patchwork::wrap_plots(plot_list, nrow = 1)
What is the composition of doublet cells ? We just look at score for each cell type.
sobj$orig.ident.doublets = case_when(is.na(sobj$doublets_consensus.class) ~ "bad quality",
sobj$doublets_consensus.class == TRUE ~ paste0(sobj$orig.ident, " doublets"),
sobj$doublets_consensus.class == FALSE ~ "not doublet")
sobj$orig.ident.doublets = factor(sobj$orig.ident.doublets,
levels = c(paste0(as.character(sample_info$sample_identifiant), " doublets"),
"bad quality", "not doublet"))
doublets_compo = function(score1, score2) {
type1 = unlist(lapply(stringr::str_split(score1, pattern = "score_"), `[[`, 2))
type2 = unlist(lapply(stringr::str_split(score2, pattern = "score_"), `[[`, 2))
if (type1 == type2) {
the_title = "Homotypic doublet"
the_subtitle = type1
score1 = "log_nCount_RNA"
} else {
the_title = "Heterotypic doublet"
the_subtitle = paste(type1, type2, sep = " + ")
}
p = sobj@meta.data %>%
dplyr::arrange(desc(orig.ident.doublets)) %>%
ggplot2::ggplot(., aes(x = eval(parse(text = score1)),
y = eval(parse(text = score2)),
col = orig.ident.doublets)) +
ggplot2::geom_point(size = 0.25) +
ggplot2::scale_color_manual(values = c(sample_info$color, "gray90", "gray60"),
breaks = c(paste0(as.character(sample_info$sample_identifiant), " doublets"),
"bad quality", "not doublet")) +
ggplot2::labs(x = score1, y = score2,
title = the_title, subtitle = the_subtitle) +
ggplot2::theme_classic() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(p)
}
score_columns = grep(x = colnames(sobj@meta.data),
pattern = "^score",
value = TRUE)
combinations = expand.grid(score_columns, score_columns) %>%
apply(., 1, sort) %>% t() %>%
as.data.frame()
combinations = combinations[!duplicated(combinations), ]
plot_list = apply(combinations, 1, FUN = function(elem) {
doublets_compo(elem[1], elem[2])
})
sobj$orig.ident.doublets = NULL
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We could save this object before filtering (remove
eval = FALSE) :
saveRDS(sobj, paste0(out_dir, "/datasets/", sample_name, "_sobj_unfiltered.rds"))
We remove :
Note: We do not filter cells detected as doublets. Indeed, few genes and transcripts are detected per cell, and the best cells are therefore annotated as doublets.
sobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA,
fail_percent.mt, fail_percent.rb)))
sobj
## An object of class Seurat
## 32738 features across 5768 samples within 1 assay
## Active assay: RNA (32738 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We normalize the count matrix for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 32738 features across 5768 samples within 1 assay
## Active assay: RNA (32738 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We perform a PCA :
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE and a UMAP with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"),
check_duplicates = FALSE)
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We annotate cells for cell type, with the new normalized expression matrix :
score_columns = grep(x = colnames(sobj@meta.data), pattern = "^score", value = TRUE)
sobj@meta.data[, score_columns] = NULL
sobj$cell_type = NULL
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 80 38 57 53
## B cells cuticle cortex medulla
## 62 209 118 93
## IRS proliferative IBL ORS
## 186 230 734 704
## IFE HFSC melanocytes sebocytes
## 1503 1215 370 116
(Time to run : 25.13 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = "RNA_pca_20_tsne") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase :
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 4905 413 368
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 3260 246 249
## G2M 753 135 59
## S 892 32 60
(Time to run : 444.22 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
We make a highly resolutive clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = "RNA_pca", dims = c(1:ndims))
sobj = Seurat::FindClusters(sobj, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5768
## Number of edges: 196760
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7884
## Number of communities: 28
## Elapsed time: 0 seconds
table(sobj$seurat_clusters)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 755 660 484 391 356 274 266 258 229 200 178 169 159 155 155 142 126 125 117 98
## 20 21 22 23 24 25 26 27
## 89 77 67 59 58 56 48 17
We can visualize the cell type :
tsne = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_tsne"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_umap"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from Seurat :
tsne = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from cyclone :
tsne = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize the clustering :
tsne = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize all cell types markers on the tSNE :
markers = dotplot_markers %>% unlist() %>% unname()
markers = markers[markers %in% rownames(sobj)]
plot_list = lapply(markers,
FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, features = one_gene,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::labs(title = one_gene) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
We save the annotated and filtered Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/datasets/", sample_name, "_sobj_filtered.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 patchwork_1.1.2 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] ggvenn_0.1.9 here_1.0.1
## [59] TInGa_0.0.0.9000 ggraph_2.0.3
## [61] pkgconfig_2.0.3 GO.db_3.10.0
## [63] DelayedMatrixStats_1.8.0 gower_0.2.1
## [65] ggbeeswarm_0.6.0 iterators_1.0.12
## [67] DropletUtils_1.6.1 reticulate_1.26
## [69] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [71] circlize_0.4.15 beeswarm_0.4.0
## [73] GetoptLong_1.0.5 xfun_0.35
## [75] bslib_0.3.1 zoo_1.8-10
## [77] tidyselect_1.1.0 reshape2_1.4.4
## [79] purrr_0.3.4 ica_1.0-2
## [81] pcaPP_1.9-73 viridisLite_0.3.0
## [83] rtracklayer_1.46.0 rlang_1.0.2
## [85] hexbin_1.28.1 jquerylib_0.1.4
## [87] dyneval_0.9.9 glue_1.4.2
## [89] RColorBrewer_1.1-2 matrixStats_0.56.0
## [91] stringr_1.4.0 lava_1.6.7
## [93] europepmc_0.3 DESeq2_1.26.0
## [95] recipes_0.1.17 labeling_0.3
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] ComplexHeatmap_2.14.0 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 grid_3.6.3
## [303] lazyeval_0.2.2 Formula_1.2-3
## [305] tsne_0.1-3 crayon_1.3.4
## [307] MASS_7.3-54 pROC_1.16.2
## [309] viridis_0.5.1 dynparam_1.0.0
## [311] rpart_4.1-15 zinbwave_1.8.0
## [313] compiler_3.6.3 ggtext_0.1.0